1 Set up

Load packages and set parameters

if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(assertthat, BiocManager, tidyverse, gridExtra, here, mapview, 
  prioritizr, prioritizrdata, raster, remotes, rgeos, rgdal, scales, sf, sp, 
  units)
if (!require("lpsymphony")){
  BiocManager::install("lpsymphony")
  library(lpsymphony)
}

options(scipen = 999)

Data setup

dir_data <- here("data/prioritizr")
pu_shp   <- file.path(dir_data, "pu.shp")
pu_url   <- "https://github.com/prioritizr/massey-workshop/raw/main/data.zip"
pu_zip   <- file.path(dir_data, basename(pu_url))
vegetation_tif <- file.path(dir_data, "vegetation.tif")

dir.create(dir_data, showWarnings = F, recursive = T)
if (!file.exists(pu_shp)){
  download.file(pu_url, pu_zip)
  unzip(pu_zip, exdir = dir_data)
  dir_unzip   <- file.path(dir_data, "data")
  files_unzip <- list.files(dir_unzip, full.names = T)
  file.rename(
    files_unzip, 
    files_unzip %>% str_replace("prioritizr/data", "prioritizr"))
  unlink(c(pu_zip, dir_unzip), recursive = T)
}

2 Data

Data import

# import planning unit data
pu_data <- as(read_sf(pu_shp), "Spatial")

# format columns in planning unit data
pu_data$locked_in <- as.logical(pu_data$locked_in)
pu_data$locked_out <- as.logical(pu_data$locked_out)

# import vegetation data
veg_data <- stack(vegetation_tif)

2.1 Planning unit data

Print data summary

# print a short summary of the data
print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 5
## names       :   id,             cost, status, locked_in, locked_out 
## min values  :  557, 3.59717531470679,      0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1

Plot data

# plot the planning unit data
plot(pu_data)

# plot an interactive map of the planning unit data
mapview(pu_data)

Print the structure of object

str(pu_data, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  516 obs. of  5 variables:
##   ..@ polygons   :List of 516
##   ..@ plotOrder  : int [1:516] 69 104 1 122 157 190 4 221 17 140 ...
##   ..@ bbox       : num [1:2, 1:2] 348703 5167775 611932 5344516
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot

Print the class of the object

class(pu_data)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

Print the slots of the object

slotNames(pu_data)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"

Print the coordinate reference system

print(pu_data@proj4string)
## CRS arguments:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs

Print number of planning units (geometries) in the data

nrow(pu_data)
## [1] 516

Print the first six rows in the data

head(pu_data@data)
##    id     cost status locked_in locked_out
## 1 557 29.74225      0     FALSE      FALSE
## 2 558 29.87703      0     FALSE      FALSE
## 3 574 28.60687      0     FALSE      FALSE
## 4 575 30.83416      0     FALSE      FALSE
## 5 576 38.75511      0     FALSE      FALSE
## 6 577 38.11618      2      TRUE      FALSE

Print the first six values in the cost column of the attribute data

head(pu_data$cost)
## [1] 29.74225 29.87703 28.60687 30.83416 38.75511 38.11618

Print the highest cost value

max(pu_data$cost)
## [1] 47.23834

Print the smallest cost value

min(pu_data$cost)
## [1] 3.597175

Print average cost value

mean(pu_data$cost)
## [1] 26.87393

Plot planning unit cost data

# plot a map of the planning unit cost data
spplot(pu_data, "cost")

# plot an interactive map of the planning unit cost data
mapview(pu_data, zcol = "cost")

Question 1: How many planning units are in the planning unit data?
Answer: There are 516 planning units in the unit data.

Question 2: What is the highest cost value?
Answer: The highest cost value is $47.24 (Australian dollars).

Question 3: Is there a spatial pattern in the planning unit cost values?
Answer: Yes, the planning units with the highest cost tend to be in the northern most central part of Tasmania, followed by the whole western half of the region.

2.2 Vegetation data

Print summary of vegetation data

# print a short summary of the data
print(veg_data)
## class      : RasterStack 
## dimensions : 164, 326, 53464, 32  (nrow, ncol, ncell, nlayers)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## names      : vegetation.1, vegetation.2, vegetation.3, vegetation.4, vegetation.5, vegetation.6, vegetation.7, vegetation.8, vegetation.9, vegetation.10, vegetation.11, vegetation.12, vegetation.13, vegetation.14, vegetation.15, ... 
## min values :            0,            0,            0,            0,            0,            0,            0,            0,            0,             0,             0,             0,             0,             0,             0, ... 
## max values :            1,            1,            1,            1,            1,            1,            1,            1,            1,             1,             1,             1,             1,             1,             1, ...

Plot 20th vegetation class

# plot a map of the 20th vegetation class
plot(veg_data[[20]])

# plot an interactive map of the 20th vegetation class
mapview(veg_data[[20]])

Print number of rows in the data

nrow(veg_data)
## [1] 164

Print number of columns in the data

ncol(veg_data)
## [1] 326

Print number of cells in the data

ncell(veg_data)
## [1] 53464

Print number of layers in the data

nlayers(veg_data)
## [1] 32

Print resolution on the x-axis

xres(veg_data)
## [1] 967

Print resolution on the y-axis

yres(veg_data)
## [1] 1020

Print spatial extent of the grid, i.e. coordinates for corners

extent(veg_data)
## class      : Extent 
## xmin       : 298636.7 
## xmax       : 613878.7 
## ymin       : 5167756 
## ymax       : 5335036

Print the coordinate reference system

print(veg_data@crs)
## CRS arguments:
##  +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs

Print a summary of the first layer in the stack

print(veg_data[[1]])
## class      : RasterLayer 
## band       : 1  (of  32  bands)
## dimensions : 164, 326, 53464  (nrow, ncol, ncell)
## resolution : 967, 1020  (x, y)
## extent     : 298636.7, 613878.7, 5167756, 5335036  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## source     : vegetation.tif 
## names      : vegetation.1 
## values     : 0, 1  (min, max)

Print the value in the 800th cell in the first layer of the stack

print(veg_data[[1]][800])
##   
## 0

Print value of cell in 30th row and 60th column of 1st layer

print(veg_data[[1]][30, 60])
##   
## 0

Calculate the sum of all the cell values in the first layer

cellStats(veg_data[[1]], "sum")
## [1] 17

Calculate the maximum value of all the cell values in the first layer

cellStats(veg_data[[1]], "max")
## [1] 1

Calculate the minimum value of all the cell values in the first layer

cellStats(veg_data[[1]], "min")
## [1] 0

Calculate the mean value of all the cell values in the first layer

cellStats(veg_data[[1]], "mean")
## [1] 0.00035239

Question 4: What part of the study area is the 13th vegetation class found in?
Answer: The 13th vegetation class is found in the eastern part of the studio area with the most density in the north-eastern part of the study area.

# plot a map of the 13th vegetation class
plot(veg_data[[13]])

Question 5: What proportion of cells contain the 12th vegetation class?

veg_data_freq12 <- freq(veg_data[[12]])

Answer: Of the total cells in the raster (53464), 819 cells contain the 12th vegetation class, making the proportion of the cells that contain the vegetation class 1.53%.

Question 6: Which vegetation class is the most abundant (i.e. present in the greatest number of cells)?

prop_df <- data.frame()

for (i in 1:nlayers(veg_data)){
 prop <- freq(veg_data[[i]])[[2, 2]] / ncell(veg_data)
 prop_df <- rbind(prop_df, prop)
}

prop_df <- rownames_to_column(prop_df) %>% 
  rename(proportion = X0.000317970971120754)

prop_highest <- prop_df %>% filter(proportion == max(proportion))

Answer: The vegetation class that is most abundant is 12.

3 Gap analysis

3.1 Feature abundance

Create and calculate prioritizr problem

# create `prioritizr` problem with only the data
p0 <- problem(pu_data, veg_data, cost_column = "cost")

# print empty problem,
# we can see that only the cost and feature data are defined
print(p0)

# calculate amount of each feature in each planning unit
abundance_data <- feature_abundances(p0)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## # … with 22 more rows
# note that only the first ten rows are printed,
# this is because the abundance_data object is a tibble (i.e. tbl_df) object
# and not a standard data.frame object
print(class(abundance_data))
## [1] "tbl_df"     "tbl"        "data.frame"

Print all rows in abundance_data

print(abundance_data, n = Inf)
## # A tibble: 32 × 3
##    feature       absolute_abundance relative_abundance
##    <chr>                      <dbl>              <dbl>
##  1 vegetation.1                16.0                  1
##  2 vegetation.2                14.3                  1
##  3 vegetation.3                10.4                  1
##  4 vegetation.4                17.8                  1
##  5 vegetation.5                13.0                  1
##  6 vegetation.6                14.3                  1
##  7 vegetation.7                20.0                  1
##  8 vegetation.8                14.0                  1
##  9 vegetation.9                18.0                  1
## 10 vegetation.10               20.0                  1
## 11 vegetation.11               23.6                  1
## 12 vegetation.12              748.                   1
## 13 vegetation.13              126.                   1
## 14 vegetation.14               10.5                  1
## 15 vegetation.15               17.5                  1
## 16 vegetation.16               15.0                  1
## 17 vegetation.17              213.                   1
## 18 vegetation.18               14.3                  1
## 19 vegetation.19               17.1                  1
## 20 vegetation.20               21.4                  1
## 21 vegetation.21               18.6                  1
## 22 vegetation.22              297.                   1
## 23 vegetation.23               20.3                  1
## 24 vegetation.24              165.                   1
## 25 vegetation.25              716.                   1
## 26 vegetation.26               24.0                  1
## 27 vegetation.27               18.8                  1
## 28 vegetation.28               17.5                  1
## 29 vegetation.29               24.7                  1
## 30 vegetation.30               59.0                  1
## 31 vegetation.31               60.0                  1
## 32 vegetation.32               32                    1

Add feature abundance row and print abundance_data

# add new column with feature abundances in km^2
abundance_data$absolute_abundance_km2 <-
  (abundance_data$absolute_abundance * prod(res(veg_data))) %>%
  set_units(m^2) %>%
  set_units(km^2)

# print abundance data
print(abundance_data)
## # A tibble: 32 × 4
##    feature       absolute_abundance relative_abundance absolute_abundance_km2
##    <chr>                      <dbl>              <dbl>                 [km^2]
##  1 vegetation.1                16.0                  1                   15.8
##  2 vegetation.2                14.3                  1                   14.1
##  3 vegetation.3                10.4                  1                   10.2
##  4 vegetation.4                17.8                  1                   17.6
##  5 vegetation.5                13.0                  1                   12.8
##  6 vegetation.6                14.3                  1                   14.1
##  7 vegetation.7                20.0                  1                   19.7
##  8 vegetation.8                14.0                  1                   13.9
##  9 vegetation.9                18.0                  1                   17.8
## 10 vegetation.10               20.0                  1                   19.7
## # … with 22 more rows

Calculate the average abundance of the features

mean(abundance_data$absolute_abundance_km2)
## 86.82948 [km^2]

Plot histogram of the features’ abundances

hist(abundance_data$absolute_abundance_km2, main = "Feature abundances")

Find the abundance of the feature with the largest abundance

max(abundance_data$absolute_abundance_km2)
## 737.982 [km^2]

Find the name of the feature with the largest abundance

abundance_data$feature[which.max(abundance_data$absolute_abundance_km2)]
## [1] "vegetation.12"

Question 7: What is the median abundance of the features?
Answer: The median abundance from the abundance dataset is 19.12 \(km^2\).

Question 8: What is the name of the feature with smallest abundance?
Answer: The feature with the smallest abundance is vegetation.3.

Question 9: How many features have a total abundance greater than 100 km^2?
Answer: There are 6 features with a total abundance greater than 100 \(km^2\).

3.2 Feature representation

Create column for protected / not, print new data

# create column in planning unit data with binary values (zeros and ones)
# indicating if a planning unit is covered by protected areas or not
pu_data$pa_status <- as.numeric(pu_data$locked_in)

# calculate feature representation by protected areas
repr_data <- eval_feature_representation_summary(p0, pu_data[, "pa_status"])

# print feature representation data
print(repr_data)
## # A tibble: 32 × 5
##    summary feature       total_amount absolute_held relative_held
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>
##  1 overall vegetation.1          16.0         0            0     
##  2 overall vegetation.2          14.3         0            0     
##  3 overall vegetation.3          10.4         0            0     
##  4 overall vegetation.4          17.8         0            0     
##  5 overall vegetation.5          13.0         0            0     
##  6 overall vegetation.6          14.3         0            0     
##  7 overall vegetation.7          20.0         0            0     
##  8 overall vegetation.8          14.0         0            0     
##  9 overall vegetation.9          18.0         0.846        0.0470
## 10 overall vegetation.10         20.0         0            0     
## # … with 22 more rows

Add column converting to \(km^2\) and print data

# add new column with the areas represented in km^2
repr_data$absolute_held_km2 <-
  (repr_data$absolute_held * prod(res(veg_data))) %>%
  set_units(m^2) %>%
  set_units(km^2)

# print representation data
print(repr_data)
## # A tibble: 32 × 6
##    summary feature       total_amount absolute_held relative_held absolute_held_k…
##    <chr>   <chr>                <dbl>         <dbl>         <dbl>           [km^2]
##  1 overall vegetation.1          16.0         0            0                 0    
##  2 overall vegetation.2          14.3         0            0                 0    
##  3 overall vegetation.3          10.4         0            0                 0    
##  4 overall vegetation.4          17.8         0            0                 0    
##  5 overall vegetation.5          13.0         0            0                 0    
##  6 overall vegetation.6          14.3         0            0                 0    
##  7 overall vegetation.7          20.0         0            0                 0    
##  8 overall vegetation.8          14.0         0            0                 0    
##  9 overall vegetation.9          18.0         0.846        0.0470            0.834
## 10 overall vegetation.10         20.0         0            0                 0    
## # … with 22 more rows

Question 10: What is the average proportion of the features held in protected areas?
Answer: The average proportion of the features held in protected areas is 0.2415.`

Question 11: If we set a target of 10% coverage by protected areas, how many features fail to meet this target?
Answer: There are 17 features that fail to meet the target of 10% coverage by protected areas.

Question 12: If we set a target of 20% coverage by protected areas, how many features fail to meet this target?
Answer: There are 18 features that fail to meet the target of 10% coverage by protected areas.

Question 13: Is there a relationship between the total abundance of a feature and how well it is represented by protected areas?
Answer: There does not appear to be a relationship between the total abundance of a feature and how well it is represented by protected areas.

plot(abundance_data$absolute_abundance ~ repr_data$relative_held)

4 Spatial prioritizations

4.1 Starting out simple

Print planning unit data

print(pu_data)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 6
## names       :   id,             cost, status, locked_in, locked_out, pa_status 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1

Make and print prioritizr problem with minimum number of constraints.

# make prioritization problem
p1_rds <- file.path(dir_data, "p1.rds")
if (!file.exists(p1_rds)){
  p1 <- problem(pu_data, veg_data, cost_column = "cost") %>%
        add_min_set_objective() %>%
        add_relative_targets(0.05) %>% # 5% representation targets
        add_binary_decisions() %>%
        add_lpsymphony_solver()
  saveRDS(p1, p1_rds)
}
p1 <- readRDS(p1_rds)

# print problem
print(p1)

Solve and print prioritizr solution that will meet the targets for our biodiversity features for minimum cost.

# solve problem
s1 <- solve(p1)

# print solution, the solution_1 column contains the solution values
# indicating if a planning unit is (1) selected or (0) not
print(s1)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1

Calculate number of planning units selected in the prioritization

eval_n_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall    15

Calculate total cost of the prioritization

eval_cost_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
##   summary  cost
##   <chr>   <dbl>
## 1 overall  385.

Plot solution

# selected = green, not selected = grey
spplot(s1, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s1",
       colorkey = FALSE)

Question 14: How many planing units were selected in the prioritization? What proportion of planning units were selected in the prioritization?
Answer: There were 15 planning units selected in the prioritization. There are a total of 516 planning units, this means that 2.9069767% of the planning units were selected.

Question 15 : Is there a pattern in the spatial distribution of the priority areas?
Answer: The priority areas appear to be spread out fairly evenly throughout the region with no spatial pattern.

Question 16: Can you verify that all of the targets were met in the prioritization?
Answer: The lowest value in s1 for column relative_held is 0.0533. This is above 5%, meaning that all of the targets were met in the prioritization.

4.2 Adding complexity

Plot locked_in data

# TRUE = blue, FALSE = grey
spplot(pu_data, "locked_in", col.regions = c("grey80", "darkblue"),
       main = "locked_in", colorkey = FALSE)

Make, solve, and print solution of prioritizr problem with more constraints. Create problem that locks in planing units that are already covered by protected areas.

# make prioritization problem
p2_rds <- file.path(dir_data, "p2.rds")
if (!file.exists(p2_rds)){
  p2 <- problem(pu_data, veg_data, cost_column = "cost") %>%
      add_min_set_objective() %>%
      add_relative_targets(0.05) %>%
      add_locked_in_constraints("locked_in") %>%
      add_binary_decisions() %>%
      add_lpsymphony_solver()
  saveRDS(p2, p2_rds)
}
p2 <- readRDS(p2_rds)
print(p2)

# solve problem
s2 <- solve(p2)
print(s2)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1

Plot solution

# selected = green, not selected = grey
spplot(s2, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s2",
       colorkey = FALSE)

Let’s pretend that we talked to an expert on the vegetation communities in our study system and they recommended that a 10% target was needed for each vegetation class. So, equipped with this information, let’s set the targets to 10%.

Make, solve, and print solution of prioritizr problem with 10% target.

# make prioritization problem
p3_rds <- file.path(dir_data, "p3.rds")
if (!file.exists(p3_rds)){
  p3 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p3, p3_rds)
}
p3 <- readRDS(p3_rds)
print(p3)

# solve problem
s3 <- solve(p3)
print(s3)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1

Plot solution

# selected = green, not selected = grey
spplot(s3, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s3",
       colorkey = FALSE)

Next, let’s lock out highly degraded areas. Similar to before, this information is present in our planning unit data so we can use the locked_out column name to achieve this.

Plot locked out data

# TRUE = red, FALSE = grey
spplot(pu_data, "locked_out", col.regions = c("grey80", "darkred"),
       main = "locked_out", colorkey = FALSE)

Make, solve, and print solution of prioritizr problem with more constraints.

# make prioritization problem
p4_rds <- file.path(dir_data, "p4.rds")
if (!file.exists(p4_rds)){
  p4 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p4, p4_rds)
}
p4 <- readRDS(p4_rds)

# print problem
print(p4)

# solve problem
s4 <- solve(p4)
print(s4)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1

Plot solution

# selected = green, not selected = grey
spplot(s4, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s4",
       colorkey = FALSE)

Question 17: What is the cost of the planning units selected in s2, s3, and s4?
Answer: The cost of the planning units selected in s2 is $6600.09. The cost of the planning units selected in s3 is $6669.91. And the costs of the planning units selected in s4 is $6711.58.

Question 18: How many planning units are in s2, s3, and s4?
Answer: There are 205 planning units in s2, 211 planning units in s3, and 212 planning units in s4.

Question 19: Do the solutions with more planning units have a greater cost? Why (or why not)?
Answer: Yes, the solutions with more planning units have a greater costs. This makes sense because the solution aims to select the lowest cost it can. As we add additional constraints, the solution is forced to choose more costly units in order to stay within those constraints.

Question 20: Why does the first solution (s1) cost less than the second solution with protected areas locked into the solution (s2)?
Answer: The first solution costs less than the second solution because there are fewer planning units included in the first solutions. This is because the second solution has the additional constraint of including all planning units that are already covered by a protected area (i.e. “locked-in”).

Question 21: Why does the third solution (s3) cost less than the fourth solution solution with highly degraded areas locked out (s4)?
Answer: The third solution costs less than the fourth solution because for the forth solution we have added a second constraint of excluding all “degraded” (i.e. "locked-out) planning units. Some of the degraded units must have a lower costs than the non-degraded units, so the overall cost of the solution goes up when we force the solution to exclude the degraded units.

4.3 Penalizing fragmentation

Make, solve, and print solution of prioritizr problem with added penalties to penalize fragmentation.

# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds) | TRUE){
  p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)

# print problem
print(p5)

# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)

# print solution
print(s5)
## class       : SpatialPolygonsDataFrame 
## features    : 516 
## extent      : 348703.2, 611932.4, 5167775, 5344516  (xmin, xmax, ymin, ymax)
## crs         : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs 
## variables   : 7
## names       :   id,             cost, status, locked_in, locked_out, pa_status, solution_1 
## min values  :  557, 3.59717531470679,      0,         0,          0,         0,          0 
## max values  : 1130,  47.238336402701,      2,         1,          1,         1,          1

Plot solution

# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
       colorkey = FALSE)

Question 22: What is the cost the fourth (s4) and fifth (s5) solutions? Why does the fifth solution (s5) cost more than the fourth (s4) solution?
Answer: The cost of the planning units selected in s4 is $6711.58 and the cost of the planning units selected in s5 is $6747.59. The fifth solution costs more than the fourth solutions because we have added yet another constraint (pick planning units that are more clustered together), which forces the solution to pick planning units with a higher cost than in the fourth solution.

Question 23: Try setting the penalty value to 0.000000001 (i.e. 1e-9) instead of 0.001. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this a useful penalty value? Why (or why not)?
Create, solve, and plot prioritizr problem with penalty value of 0.000000001

# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds) | TRUE){
  p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.000000001) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)

# solve problem
s5 <- solve(p5)

# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
       colorkey = FALSE)

Answer: The cost of the new penalty solution is $6711.76, which is very close to to the cost of the fourth solution ($6711.58). When plotted the maps also look similar (with only very slight differences). Because this solution is so similar to s4, I do not think this is a helpful penalty value.

Question 24: Try setting the penalty value to 0.5. What is the cost of the solution now? Is it different from the fourth solution (s4)? Is this a useful penalty value? Why (or why not)?
Create, solve, and plot prioritizr problem with penalty value of 0.5

# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds) | TRUE){
  p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
    add_min_set_objective() %>%
    add_boundary_penalties(penalty = 0.5) %>%
    add_relative_targets(0.1) %>%
    add_locked_in_constraints("locked_in") %>%
    add_locked_out_constraints("locked_out") %>%
    add_binary_decisions() %>%
    add_lpsymphony_solver()
  saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)

# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)

# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
       colorkey = FALSE)

Answer: The cost of the new penalty solution is $9816.64, which is higher then the cost of s4 solution ($6711.58). This is also not a useful penalty value because it forces all planning units to be clustered together, which is probably more clustering than is actually necessary and therefore a more costly solution than we would like.